from lxml import etree



# tree = etree.parse('123.html',parser=etree.HTMLParser())

# with open('123.html','r',encoding='utf-8') as f:
#     tree = etree.parse(f,parser=etree.HTMLParser())
#     print(type(tree))

# parser默认是XMLParser
# tree = etree.parse('123.html')
# print(type(tree))
# tree = etree.parse('123.html',parser=etree.HTMLParser())
# print(type(tree))


text = '''
<!DOCTYPE html>
<html lang="en">
<head>
 
    <title>Title</title>
</head>
<body>
    hello lxml
    <ul>
        <li>1</li>
        <li>2</li>
        <li>3</li>

    </ul>
</body>
</html>
'''
# XMLParser严格按照c标准，标签闭合，而meta标签没闭合故程序报错
tree = etree.fromstring(text, parser=etree.XMLParser())
# tree = etree.fromstring(text, parser=etree.HTMLParser())

# print(tree.tag)
# print(tree.text)
# print(tree.attrib)

title = tree.find('head/title').text
print(title)

lis = tree.findall('body/ul/li')
for li in lis:
    print(li.text)



